package com.wsc.crawler.grabber;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.wsc.crawler.init.CrawlerConfig;

public class Filter {

	List<String> wsdlurls = new ArrayList<String>();
	
	private CrawlerConfig config;
	
	public List<URL> filterWSDLUrls(List<URL> urls){
		/*
		 * COde to filter out WSDL urls from hyperlinks, get regx from config.
		 * 
		 */
		Pattern pattern = Pattern.compile(config.getWsdl_Filter_Expression());
		
		Matcher matcher=pattern.matcher("asmx.?wsdl");
		
		if(matcher.matches()){
			System.out.print("Expression Matched");
		}
		else System.out.print("Expression Not Matched");
		return urls;
	}

public void setConfig(CrawlerConfig config){
	this.config = config;
}
	
	public static void main(String...s){
		Filter f = new Filter();
		CrawlerConfig config=new CrawlerConfig();
		config.loadDefaultXmlFile();
		
		f.setConfig(config);
		
		f.filterWSDLUrls(null);
		
	}

}
